# -*- coding: utf-8 -*-
"""
Reliability & Validity Assessment Script (v1.1.0)
Dataset: NIST-style synthetic data for grey-zone hybrid warfare modeling
Author: ChatGPT (generated for reproducibility)
Usage:
    python assess_reliability_validity.py
Outputs:
    - rv_summary.json : key statistics for reliability & validity checks
    - console printout of results
Notes:
    - Assumes the CSV/JSON files are colocated in the working directory.
"""

import os, json
import numpy as np
import pandas as pd
from datetime import timedelta
from scipy import stats

SEED = 20251211  # to keep bootstrap reproducible

def cohens_d(x, y):
    x, y = np.array(x), np.array(y)
    nx, ny = len(x), len(y)
    vx, vy = x.var(ddof=1), y.var(ddof=1)
    denom = ((nx-1)*vx + (ny-1)*vy)
    if (nx + ny - 2) <= 0 or denom <= 0:
        return float('nan')
    s = np.sqrt(denom / (nx + ny - 2))
    return (x.mean() - y.mean()) / s if s > 0 else float('nan')

def bootstrap_ci_stat(a, b, stat_fn, n_boot=5000, alpha=0.05, seed=SEED):
    rng = np.random.default_rng(seed)
    a = np.array(a); b = np.array(b)
    nA, nB = len(a), len(b)
    boots = []
    for _ in range(n_boot):
        sa = rng.choice(a, size=nA, replace=True)
        sb = rng.choice(b, size=nB, replace=True)
        boots.append(stat_fn(sa, sb))
    lo = float(np.percentile(boots, 100*alpha/2))
    hi = float(np.percentile(boots, 100*(1-alpha/2)))
    return lo, hi

def main():
    base = os.getcwd()
    # Load data
    mkt = pd.read_csv(os.path.join(base, "market_event_study.csv"))
    pdl = pd.read_csv(os.path.join(base, "pdl_events.csv"))
    met = pd.read_csv(os.path.join(base, "drloop_metrics.csv"))
    prov = json.load(open(os.path.join(base, "provenance.json"), "r", encoding="utf-8"))

    # --- Reliability ---
    targeted = ["Energy","Logistics","Telecom","Chemicals"]
    t0_S1 = pd.to_datetime(mkt.loc[mkt["scenario_id"]=="S1","t0"].unique()[0])
    t0_S2 = pd.to_datetime(mkt.loc[mkt["scenario_id"]=="S2","t0"].unique()[0])
    windows = [-5, -3, -1, 0, 1, 3, 5]
    dates_S1 = [(t0_S1+pd.Timedelta(days=w)).strftime("%Y-%m-%d") for w in windows]

    # (R1) Internal consistency: pairwise corr of abnormal_ret between the 2 tickers in each targeted sector across event windows
    consistency = []
    for sec in targeted:
        df = mkt[(mkt["scenario_id"]=="S1") & (mkt["sector"]==sec) & (mkt["date"].isin(dates_S1))][["date","ticker","abnormal_ret"]]
        if df["ticker"].nunique() >= 2:
            piv = df.pivot(index="date", columns="ticker", values="abnormal_ret").dropna()
            if piv.shape[0] >= 3:
                corr = piv.corr().values
                r = float(corr[0,1]) if corr.shape[0] >= 2 else float('nan')
                consistency.append({"sector": sec, "pairwise_corr_over_windows": r, "n_dates": int(piv.shape[0])})
            else:
                consistency.append({"sector": sec, "pairwise_corr_over_windows": float('nan'), "n_dates": int(piv.shape[0])})
        else:
            consistency.append({"sector": sec, "pairwise_corr_over_windows": float('nan'), "n_dates": 0})

    # (R2) Monotonic sovereign CDS for S2 at [0,1,3]
    sov = mkt[(mkt["scenario_id"]=="S2") & (mkt["sector"]=="Sovereign")].copy()
    seq = sov[sov["date"].isin([t0_S2.strftime("%Y-%m-%d"),
                                (t0_S2+pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
                                (t0_S2+pd.Timedelta(days=3)).strftime("%Y-%m-%d")])]\
             .sort_values("date")["cds_bps"].values
    monotonic_up = bool(seq[0] < seq[1] < seq[2])

    # (R3) Fingerprints present
    fingerprints_ok = all("sha256" in info for info in prov["files"].values())

    # --- Validity ---
    # (V1) H1: S1 targeted vs non-targeted abnormal_ret at t0
    ar_target = mkt[(mkt["scenario_id"]=="S1") & (mkt["date"]==t0_S1.strftime("%Y-%m-%d")) & (mkt["sector"].isin(targeted))]["abnormal_ret"].dropna().values
    ar_nontarget = mkt[(mkt["scenario_id"]=="S1") & (mkt["date"]==t0_S1.strftime("%Y-%m-%d")) & (~mkt["sector"].isin(targeted)) & (mkt["sector"]!="Sovereign")]["abnormal_ret"].dropna().values
    ttest = stats.ttest_ind(ar_target, ar_nontarget, equal_var=False)
    d_eff = cohens_d(ar_target, ar_nontarget)
    # bootstrap CIs
    md_fn = lambda a,b: float(np.mean(a) - np.mean(b))
    md_lo, md_hi = bootstrap_ci_stat(ar_target, ar_nontarget, md_fn, n_boot=5000, seed=SEED)
    d_lo, d_hi = bootstrap_ci_stat(ar_target, ar_nontarget, lambda a,b: cohens_d(a,b), n_boot=5000, seed=SEED)

    # (V2) H2: lambda S2 > 0 and > lambda S1
    lambda_S1 = float(met.loc[met["scenario_id"]=="S1","credit_kill_speed_lambda_bps_per_day"].iloc[0])
    lambda_S2 = float(met.loc[met["scenario_id"]=="S2","credit_kill_speed_lambda_bps_per_day"].iloc[0])

    # (V3) Mechanism: vis*interdep vs sector AR @ t0 (S1)
    pdl_S1 = pdl[pdl["scenario_id"]=="S1"].copy()
    pdl_S1["vis_x_interdep"] = pdl_S1["visibility_weight"] * pdl_S1["interdependence_weight"]
    agg = pdl_S1.groupby("sector", as_index=False)["vis_x_interdep"].mean()
    ar_sec = mkt[(mkt["scenario_id"]=="S1") & (mkt["date"]==t0_S1.strftime("%Y-%m-%d")) & (mkt["sector"]!="Sovereign")] \
                .groupby("sector", as_index=False)["abnormal_ret"].mean()
    merge = pd.merge(agg, ar_sec, on="sector", how="inner")
    corr_mech = float(merge["vis_x_interdep"].corr(merge["abnormal_ret"])) if len(merge)>=3 else float('nan')

    # (V4) Stylized fact: mean AR (targeted) negative
    mean_ar_target = float(np.mean(ar_target))

    # Placebo test: pseudo t0 = t0-5
    placebo_date = (t0_S1 - pd.Timedelta(days=5)).strftime("%Y-%m-%d")
    ar_target_pl = mkt[(mkt["scenario_id"]=="S1") & (mkt["date"]==placebo_date) & (mkt["sector"].isin(targeted))]["abnormal_ret"].dropna().values
    ar_nontarget_pl = mkt[(mkt["scenario_id"]=="S1") & (mkt["date"]==placebo_date) & (~mkt["sector"].isin(targeted)) & (mkt["sector"]!="Sovereign")]["abnormal_ret"].dropna().values
    ttest_pl = stats.ttest_ind(ar_target_pl, ar_nontarget_pl, equal_var=False) if len(ar_target_pl)>0 and len(ar_nontarget_pl)>0 else None

    summary = {
        "meta": {
            "timestamp_utc": datetime.datetime.utcnow().isoformat(),
            "seed": SEED
        },
        "Reliability": {
            "internal_consistency_pairwise_corr_over_windows_S1": consistency,
            "sovereign_monotonic_cdss_S2_t0_1_3": monotonic_up,
            "fingerprint_coverage_in_provenance": fingerprints_ok
        },
        "Validity": {
            "H1_p_value": float(ttest.pvalue),
            "H1_cohens_d": float(d_eff),
            "H1_bootstrap_CI_mean_diff": [md_lo, md_hi],
            "H1_bootstrap_CI_cohens_d": [d_lo, d_hi],
            "H2_lambda_S1": lambda_S1,
            "H2_lambda_S2": lambda_S2,
            "mechanism_corr_visXinterdep_vs_AR": corr_mech,
            "mean_AR_targeted_S1_t0": mean_ar_target,
            "placebo_H1_p_value": (float(ttest_pl.pvalue) if ttest_pl else None)
        }
    }

    with open(os.path.join(base, "rv_summary.json"), "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2, ensure_ascii=False)

    # Console print
    print(json.dumps(summary, indent=2, ensure_ascii=False))

if __name__ == "__main__":
    import datetime
    main()
